
# HETA_dataset.py
# A module to load datasets used in HETA: Long-Range Agreement, TellMeWhy, and WikiBio.

from datasets import load_dataset

def load_longra():
    """
    Load the Long-Range Agreement (LongRA) dataset.
    Returns: HuggingFace DatasetDict
    """
    dataset = load_dataset("your-dataset-repo/longra")  # Replace with the actual repo name
    return dataset

def load_tellmewhy():
    """
    Load the TellMeWhy dataset.
    Returns: HuggingFace DatasetDict
    """
    dataset = load_dataset("your-dataset-repo/tellmewhy")  # Replace with actual repo
    return dataset

def load_wikibio():
    """
    Load the WikiBio dataset.
    Returns: HuggingFace DatasetDict
    """
    dataset = load_dataset("wiki_bio")
    return dataset

def load_all():
    """
    Load all three datasets and return them as a dictionary.
    """
    return {
        "longra": load_longra(),
        "tellmewhy": load_tellmewhy(),
        "wikibio": load_wikibio()
    }

if __name__ == "__main__":
    # Test loading all datasets
    datasets = load_all()
    for name, ds in datasets.items():
        print(f"{name} loaded: {ds}")
